這篇是 使用 Google Docs 打造 Wep App Engine 的延續,
目的是做個實例,讓大家能夠更瞭解它的運作方式!
以下是實作[忍の道徽章排行]的思路:
1.首先從 這裡 分析所有參加忍の道活動的ID、暱稱,存進 FusionTables。
2.再透過 http://ithelp.ithome.com.tw/js-ninja/sticker-badges/user/ID 統計每個人的徽章數。
3.最後在 Web App 上利用 Visualization 顯示徽章排行榜!
再來是 Code 的部份:
1.SpreadSheet (Google 試算表) 裡的 Google app script :
// Table public id
var tableID = "3707276";
// Account
var email = UserProperties.getProperty('email');
var password = UserProperties.getProperty('password');
if (email === null || password === null) {
email = Browser.inputBox('Enter email');
password = Browser.inputBox('Enter password');
UserProperties.setProperty('email',email);
UserProperties.setProperty('password', password);
};
// Google Auth Token
var authToken = getGAauthenticationToken(email,password);
// Google Auth
function getGAauthenticationToken(email, password) {
password = encodeURIComponent(password);
var response = UrlFetchApp.fetch("https://www.google.com/accounts/ClientLogin", {
method: "post",
payload: "accountType=GOOGLE&Email=" + email + "&Passwd=" + password + "&service=fusiontables&Source=js-ninja"
});
var responseStr = response.getContentText();
responseStr = responseStr.slice(responseStr.search("Auth=") + 5, responseStr.length);
responseStr = responseStr.replace(/\n/g, "");
return responseStr;
};
// 使用者名單
function JS_List() {
for (var i=1 ; i <= 24 ; i++) {
var URL = "http://ithelp.ithome.com.tw/js-ninja/hero-list/page/" + i + "/level/1";
var response = UrlFetchApp.fetch(URL);
var code = response.getResponseCode();
if (code == 200) {
var html = response.getContentText("UTF-8");
var list = html.split('hero_list_bg');
for (var j=1; j < list.length; j++) {
var id = substr(0,list[j],'id=','"');
var name = substr(0,list[j],'left">',' ');
// 新增
var SQL = "INSERT INTO " + tableID + " ('ID','Name') VALUES ('" + id + "','" + name + "');";
UrlFetchApp.fetch("http://www.google.com/fusiontables/api/query", {
method: "POST",
payload : {"sql" : SQL},
headers: {"Authorization": "GoogleLogin auth=" + authToken,}
});
}
}
}
}
// 計算徽章數
function Badge() {
// 查詢
var SQL = "SELECT rowid,ID FROM " + tableID;
var SQLresponse = UrlFetchApp.fetch("http://www.google.com/fusiontables/api/query", {
method: "POST",
payload : {"sql" : SQL},
headers: {"Authorization": "GoogleLogin auth=" + authToken,}
});
var tableData = SQLresponse.getContentText();
var resParse = Utilities.parseCsv(tableData);
for (var i=1; i < resParse.length; i++) {
var id = resParse[i][1];
var URL = "http://ithelp.ithome.com.tw/js-ninja/sticker-badges/user/" + id;
var response = UrlFetchApp.fetch(URL);
var code = response.getResponseCode();
if (code == 200) {
var html = response.getContentText("UTF-8");
var total = html.split('icon1').length-1;
// 更新
var SQL = "UPDATE " + tableID + " SET 'Total' = '" + total + "' WHERE rowid = '"+ resParse[i][0] +"';";
UrlFetchApp.fetch("http://www.google.com/fusiontables/api/query", {
method: "POST",
payload : {"sql" : SQL},
headers: {"Authorization": "GoogleLogin auth=" + authToken,}
});
}
}
};
// 字串擷取
function substr(index,data,star,end){
var x = data.indexOf(star,index);
var y = data.indexOf(end,x);
return data.substring(x+star.length,y);
};
2.接著將 FusionTables 設為公開!(點連結進去可以看到原始資料)
3.最後用 Visualization 從 FusionTables 抓資料!(完整範例)
google.load('visualization', '1');
function drawTable() {
// Construct query
var query = "SELECT 'Name' as '暱稱', 'Total' as '徽章數' FROM 3707276";
var team = document.getElementById('team').value;
if (team) {
query += " WHERE 'Total' >= '" + team + "'";
}
var queryText = encodeURIComponent(query);
var gvizQuery = new google.visualization.Query(
'http://www.google.com/fusiontables/gvizdata?tq=' + queryText);
// Send query and draw table with data in response
gvizQuery.send(function(response) {
var numRows = response.getDataTable().getNumberOfRows();
var numCols = response.getDataTable().getNumberOfColumns();
var ftdata = [''];
for (var i = 0; i < numCols; i++) {
var columnTitle = response.getDataTable().getColumnLabel(i);
ftdata.push('');
}
ftdata.push('');
for (var i = 0; i < numRows; i++) {
ftdata.push('');
for(var j = 0; j < numCols; j++) {
var rowValue = response.getDataTable().getValue(i, j);
ftdata.push('');
}
ftdata.push('');
}
ftdata.push('<table><thead>
<tr><th>' + columnTitle + '</th></tr>
</thead><tbody>
<tr><td>' + rowValue + '</td></tr>
</tbody></table>
');
document.getElementById('ft-data').innerHTML = ftdata.join('');
});
}
google.setOnLoadCallback(drawTable);
其實這篇和 YQL 都可以視為 Javascript 跨網域的解決方案,
但 YQL 比較適合單一網頁,例如用 jQuery + YQL 示範的文章清單!
而這篇的應用對於抓取大量網頁非常有優勢,
根據我的經驗,在限制的執行時間內 Google 能抓取的網頁數量約為 150 個左右!
超過 150 頁的話,就得用其他技巧讓它分批執行了。